home *** CD-ROM | disk | FTP | other *** search
/ Enter 2005 December / enter-cd-12-2005.iso / Internet / SpamAware 4.0 / SpamAware-Setup.exe / {app} / rules / 20_head_tests.cf < prev    next >
Encoding:
Text File  |  2005-06-20  |  27.6 KB  |  560 lines

  1. # SpamAssassin rules file: header tests
  2. #
  3. # Please don't modify this file as your changes will be overwritten with
  4. # the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
  5. # See 'perldoc Mail::SpamAssassin::Conf' for details.
  6. #
  7. # <@LICENSE>
  8. # Copyright 2004 Apache Software Foundation
  9. # Licensed under the Apache License, Version 2.0 (the "License");
  10. # you may not use this file except in compliance with the License.
  11. # You may obtain a copy of the License at
  12. #     http://www.apache.org/licenses/LICENSE-2.0
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. # </@LICENSE>
  19. #
  20. ###########################################################################
  21.  
  22. require_version @@VERSION@@
  23.  
  24. header HEAD_LONG        eval:check_for_long_header()
  25. describe HEAD_LONG        Message headers are very long
  26.  
  27. header NO_REAL_NAME        From =~ /^["\s]*\<?\S+\@\S+\>?\s*$/
  28. describe NO_REAL_NAME        From: does not include a real name
  29.  
  30. header FROM_ENDS_IN_NUMS    From =~ /\d\d\@/
  31. describe FROM_ENDS_IN_NUMS    From: ends in numbers
  32.  
  33. header FROM_STARTS_WITH_NUMS    From =~ /^\d\d/
  34. describe FROM_STARTS_WITH_NUMS    From: starts with nums
  35.  
  36. # note: anchored for speed
  37. header FROM_HAS_MIXED_NUMS    From =~ /(?:^|\D)\d+[a-z]+\d+\S*\@/i
  38. describe FROM_HAS_MIXED_NUMS    From: contains numbers mixed in with letters
  39.  
  40. # idea from Robert Menschel <RMSA@Menschel.net>
  41. header FROM_HAS_MIXED_NUMS3    From:addr =~ /^[a-z]+\d+[a-z]+\d+[a-z]+\w*\@/i
  42. describe FROM_HAS_MIXED_NUMS3    From: contains numbers mixed in with letters
  43.  
  44. # Faked addresses tend to come from big public sites.  Stats show that
  45. # 5 digits is enough to get a 1.0 s/o ratio; 4 is too low (probably due
  46. # to folks called "jmason2002@yahoo.com" for example).
  47. header ADDR_NUMS_AT_BIGSITE    ALL =~ /^(To|From|Cc|Reply-To):\s*<?\S+\d{5,}\@(?:aol|bigfoot|compuserve|excite|hotmail|juno|prodigy|yahoo)\.(?:com|net|org)/mi
  48. describe ADDR_NUMS_AT_BIGSITE    Uses an address with lots of numbers, at a big ISP
  49.  
  50. header __FROM_JUST_NUMBER       From:addr =~ /^\d+\@/
  51. header __FROM_PHONE             From:addr =~ /^\d{3}(?:[-.]?\d{3}[-.]?\d{4}|\d{7})\@/
  52. meta FROM_ALL_NUMS              (__FROM_JUST_NUMBER && !__FROM_PHONE)
  53. describe FROM_ALL_NUMS          From an address that is all numbers (non-phone)
  54.  
  55. header FROM_OFFERS        From:addr =~ /\@\S*offers(?![eo]n\b)/i
  56. describe FROM_OFFERS        From address is "at something-offers"
  57.  
  58. header FROM_NO_USER        From =~ /(?:^\@|<\@| \@[^\)<]*$|<>)/ [if-unset: unset@unset.unset]
  59. describe FROM_NO_USER        From: has no local-part before @ sign
  60.  
  61. header TO_NO_USER        To =~ /(?:^\@|<\@| \@[^\)<]*$|<>)/ [if-unset: unset@unset.unset]
  62. describe TO_NO_USER        To: has no local-part before @ sign
  63.  
  64. header TO_EMPTY            To =~ /^\s*$/ [if-unset: UNSET]
  65. describe TO_EMPTY        To: is empty
  66.  
  67. header REPLY_TO_EMPTY        Reply-To =~ /^\s*$/ [if-unset: UNSET]
  68. describe REPLY_TO_EMPTY        Reply-To: is empty
  69.  
  70. header TO_ADDRESS_EQ_REAL    To =~ /^\s*"([^"@]+\@[^"@]+)"\s+<\1>\s*$/i
  71. describe TO_ADDRESS_EQ_REAL    To: repeats address as real name
  72.  
  73. # NOTE: this is what 100% valid undisclosed-recipients mails look like.
  74. # If this gets a high score, that's a bug!
  75. header UNDISC_RECIPS        To =~ /^undisclosed-recipients?:\s*;$/
  76. describe UNDISC_RECIPS        Valid-looking To "undisclosed-recipients"
  77.  
  78. # also 100% valid
  79. header FAKED_UNDISC_RECIPS    To =~ /undisclosed[_ ]*recipient(?:s[^:]|[^s])/i
  80. describe FAKED_UNDISC_RECIPS    Faked To "Undisclosed-Recipients"
  81.  
  82. header PLING_QUERY        Subject =~ /\?.*!|!.*\?/
  83. describe PLING_QUERY        Subject has exclamation mark and question mark
  84.  
  85. header SUBJ_HAS_UNIQ_ID        eval:check_for_unique_subject_id()
  86. describe SUBJ_HAS_UNIQ_ID    Subject contains a unique ID
  87.  
  88. header SUBJ_HAS_SPACES        Subject =~ /(?:\s{6}|\t\s|\s\t)\S/
  89. describe SUBJ_HAS_SPACES    Subject contains lots of white space
  90.  
  91. header SUBJ_ALL_CAPS        eval:subject_is_all_caps()
  92. describe SUBJ_ALL_CAPS        Subject is all capitals
  93.  
  94. header MSGID_SPAM_99X9XX99    MESSAGEID =~ /^<\d\d\d\d\d\d[a-z]\d[a-z][a-z]\d\d\$[a-z][a-z][a-z]\d\d\d\d\d\$\d\d\d\d\d\d\d\d\@/
  95. describe MSGID_SPAM_99X9XX99    Spam tool Message-Id: (99x9xx99 variant)
  96.  
  97. header MSGID_SPAM_ALPHA_NUM    MESSAGEID =~ /<[A-Z]{7}-000[0-9]{10}\@[a-z]*>/
  98. describe MSGID_SPAM_ALPHA_NUM    Spam tool Message-Id: (alpha-numeric variant)
  99.  
  100. header MSGID_SPAM_CAPS        Message-ID =~ /^\s*<?[A-Z]+\@(?!(?:mailcity|whowhere)\.com)/
  101. describe MSGID_SPAM_CAPS    Spam tool Message-Id: (caps variant)
  102.  
  103. header MSGID_SPAM_LETTERS    Message-Id =~ /<[a-z]{5,}\@(\S+\.)+\S+>/
  104. describe MSGID_SPAM_LETTERS    Spam tool Message-Id: (letters variant)
  105.  
  106. header MSGID_SPAM_ZEROES    MESSAGEID =~ /<0000[0-9a-f]{8}\$0000[0-9a-f]{4}\$0000[0-9a-f]{4}\@/
  107. describe MSGID_SPAM_ZEROES    Spam tool Message-Id: (12-zeroes variant)
  108.  
  109. header MSGID_NO_HOST            MESSAGEID =~ /\@>(?:$|\s)/m
  110. describe MSGID_NO_HOST         Message-Id has no hostname
  111.  
  112. header MSGID_OUTLOOK_INVALID    eval:check_outlook_message_id()
  113. describe MSGID_OUTLOOK_INVALID    Message-Id is fake (in Outlook Express format)
  114.  
  115. # catches a few spams missed by MSGID_OUTLOOK_INVALID
  116. header __HAS_OUTLOOK_IN_MAILER    X-Mailer =~ /\bMSCRM\b|Microsoft (?:CDO|Outlook|Office Outlook)\b/
  117. meta MSGID_DOLLARS        (__OUTLOOK_DOLLARS_MSGID && !__HAS_OUTLOOK_IN_MAILER && !__UNUSABLE_MSGID)
  118. describe MSGID_DOLLARS        Message-Id has pattern used in spam
  119.  
  120. # bit of a ratware rule, but catches a bit more than just the one ratware
  121. header __MSGID_RANDY        Message-ID =~ /<[a-z\d][a-z\d\$-]{10,29}[a-z\d]\@[a-z\d][a-z\d.]{3,12}[a-z\d]>/
  122. # heuristic to eliminate most good Message-ID formats
  123. header __MSGID_OK_HEX        Message-ID =~ /\b[a-f\d]{8}\b/
  124. header __MSGID_OK_DIGITS    Message-ID =~ /\d{10}/
  125. header __MSGID_OK_HOST        Message-ID =~ /\@(?:\D{2,}|(?:\d{1,3}\.){3}\d{1,3})>/
  126. meta MSGID_RANDY    (__MSGID_RANDY && !(__MSGID_OK_HEX || __MSGID_OK_HOST || __MSGID_OK_HOST))
  127. describe MSGID_RANDY        Message-Id has pattern used in spam
  128.  
  129. # bug 3395
  130. header MSGID_YAHOO_CAPS        Message-ID =~ /<[A-Z]+\@yahoo.com>/
  131. describe MSGID_YAHOO_CAPS    Message-ID has ALLCAPS@yahoo.com
  132.  
  133. ###########################################################################
  134.  
  135. header __MSGID_BEFORE_RECEIVED    ALL =~ /\nMessage-Id:.*\nReceived:/si
  136. header __MSGID_BEFORE_OKAY    Message-Id =~ /\@[a-z0-9.-]+\.(?:yahoo|wanadoo)(?:\.[a-z]{2,3}){1,2}>/
  137. meta MSGID_FROM_MTA_HEADER    (__MSGID_BEFORE_RECEIVED && !__MSGID_BEFORE_OKAY)
  138. describe MSGID_FROM_MTA_HEADER    Message-Id was added by a relay
  139.  
  140. header MSGID_FROM_MTA_ID    eval:message_id_from_mta()
  141. describe MSGID_FROM_MTA_ID    Message-Id for external message added locally
  142.  
  143. header MSGID_FROM_MTA_HOTMAIL    Message-Id =~ /<MC\d{1,2}-F{1,2}\w{21,22}\@\S*hotmail\.com>/
  144. describe MSGID_FROM_MTA_HOTMAIL    Message-Id was added by a hotmail.com relay
  145.  
  146. ###########################################################################
  147.  
  148. header DATE_SPAMWARE_Y2K    Date =~ /^[A-Z][a-z]{2}, \d\d [A-Z][a-z]{2} [0-6]\d \d\d:\d\d:\d\d [A-Z]{3}$/
  149. describe DATE_SPAMWARE_Y2K    Date header uses unusual Y2K formatting
  150.  
  151. header INVALID_DATE        Date !~ /^\s*(?:(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun), )?[0-3 ]?[0-9] (?:Jan|Feb|Ma[ry]|Apr|Ju[nl]|Aug|Sep|Oct|Nov|Dec) (?:[12][901])?[0-9]{2} [0-2][0-9](?:\:[0-5][0-9]){1,2} (?:[+-][0-9]{4}|UT|[A-Z]{2,3}T)(?:\s+\(.*\))?\s*$/ [if-unset: Wed, 31 Jul 2002 16:41:57 +0200]
  152. describe INVALID_DATE        Invalid Date: header (not RFC 2822)
  153.  
  154. # allow +1300, NZ timezone
  155. header INVALID_DATE_TZ_ABSURD    Date =~ /[-+](?:1[4-9]\d\d|[2-9]\d\d\d)$/
  156. describe INVALID_DATE_TZ_ABSURD    Invalid Date: header (timezone does not exist)
  157.  
  158. header INVALID_TZ_CST        ALL =~ /[+-]\d\d[30]0(?<!-0600|-0500|\+0800|\+0930|\+1030)\s+(?:\bCST\b|\(CST\))/
  159. describe INVALID_TZ_CST        Invalid date in header (wrong CST timezone)
  160.  
  161. header INVALID_TZ_EST        ALL =~ /[+-]\d\d[30]0(?<!-0500|-0300|\+1000|\+1100)\s+(?:\bEST\b|\(EST\))/
  162. describe INVALID_TZ_EST        Invalid date in header (wrong EST timezone)
  163.  
  164. header INVALID_TZ_GMT        ALL =~ /[+-]\d\d[30]0(?<![+-]0000)\s+(?:\b(?:GMT|UTC)\b(?![\w+-])|\((?:GMT|UTC)\))/
  165. describe INVALID_TZ_GMT        Invalid date in header (wrong GMT/UTC timezone)
  166.  
  167. header DATE_IN_PAST_03_06    eval:check_for_shifted_date('-6', '-3')
  168. describe DATE_IN_PAST_03_06    Date: is 3 to 6 hours before Received: date
  169.  
  170. header DATE_IN_PAST_06_12    eval:check_for_shifted_date('-12', '-6')
  171. describe DATE_IN_PAST_06_12    Date: is 6 to 12 hours before Received: date
  172.  
  173. header DATE_IN_PAST_12_24    eval:check_for_shifted_date('-24', '-12')
  174. describe DATE_IN_PAST_12_24    Date: is 12 to 24 hours before Received: date
  175.  
  176. header DATE_IN_PAST_24_48    eval:check_for_shifted_date('-48', '-24')
  177. describe DATE_IN_PAST_24_48    Date: is 24 to 48 hours before Received: date
  178.  
  179. header DATE_IN_PAST_48_96    eval:check_for_shifted_date('-96', '-48')
  180. describe DATE_IN_PAST_48_96    Date: is 48 to 96 hours before Received: date
  181.  
  182. header DATE_IN_PAST_96_XX    eval:check_for_shifted_date('undef', '-96')
  183. describe DATE_IN_PAST_96_XX    Date: is 96 hours or more before Received: date
  184.  
  185. header DATE_IN_FUTURE_03_06    eval:check_for_shifted_date('3', '6')
  186. describe DATE_IN_FUTURE_03_06    Date: is 3 to 6 hours after Received: date
  187.  
  188. header DATE_IN_FUTURE_06_12    eval:check_for_shifted_date('6', '12')
  189. describe DATE_IN_FUTURE_06_12    Date: is 6 to 12 hours after Received: date
  190.  
  191. header DATE_IN_FUTURE_12_24    eval:check_for_shifted_date('12', '24')
  192. describe DATE_IN_FUTURE_12_24    Date: is 12 to 24 hours after Received: date
  193.  
  194. header DATE_IN_FUTURE_24_48    eval:check_for_shifted_date('24', '48')
  195. describe DATE_IN_FUTURE_24_48    Date: is 24 to 48 hours after Received: date
  196.  
  197. header DATE_IN_FUTURE_48_96    eval:check_for_shifted_date('48', '96')
  198. describe DATE_IN_FUTURE_48_96    Date: is 48 to 96 hours after Received: date
  199.  
  200. header DATE_IN_FUTURE_96_XX    eval:check_for_shifted_date('96', 'undef')
  201. describe DATE_IN_FUTURE_96_XX    Date: is 96 hours or more after Received: date
  202.  
  203. header UNRESOLVED_TEMPLATE    ALL =~ /^(?!(?i:X-UIDL|X-Face|To|Cc|From|Subject|References|In-Reply-To|(?:X-|Resent-|X-Original-)?Message-Id):)[\w-]{1,24}:(?:[^\n]{0,100}|\n[ \t]){0,2}%[A-Z][A-Z_-]/m
  204. describe UNRESOLVED_TEMPLATE    Headers contain an unresolved template
  205.  
  206. ###########################################################################
  207. # illegal characters that should be MIME encoded
  208. # might want to exempt users using languages that don't use Latin
  209. # alphabets, but do it in the eval
  210.  
  211. header SUBJ_ILLEGAL_CHARS    eval:check_illegal_chars('Subject','0.00','2')
  212. describe SUBJ_ILLEGAL_CHARS    Subject contains too many raw illegal characters
  213.  
  214. header FROM_ILLEGAL_CHARS    eval:check_illegal_chars('From','0.20','2')
  215. describe FROM_ILLEGAL_CHARS    From contains too many raw illegal characters
  216.  
  217. header HEAD_ILLEGAL_CHARS    eval:check_illegal_chars('ALL','0.005','2')
  218. describe HEAD_ILLEGAL_CHARS    Header contains too many raw illegal characters
  219.  
  220. ###########################################################################
  221. # ADV tags in various languages
  222.  
  223. header ENGLISH_UCE_SUBJECT    Subject =~ /^[^0-9a-z]*adv(?:ert)?\b/i
  224. describe ENGLISH_UCE_SUBJECT    Subject contains an English UCE tag
  225.  
  226. # alan premselaar <alien@12inch.com>, see SpamAssassin-talk list 2003-03
  227. # quinlan: 2003-03-23 here are more generic Japanese iso-2022-jp codes
  228. # ("not yet acceptance" or "email") + "announcement"
  229. # FWIW, according to Peter Evans, this should be sufficient to catch the
  230. # UCE tag and a common attempt at evasion (using the "sue" instead of
  231. # "mi" Chinese character).
  232. header JAPANESE_UCE_SUBJECT    Subject =~ /\e\$B.*(?:L\$>5Bz|EE;R%a!<%k)9-9p/
  233. describe JAPANESE_UCE_SUBJECT    Subject contains a Japanese UCE tag
  234.  
  235. # quinlan: "advertisement" in Russian KOI8-R
  236. # (no longer common, but worth noting in future)
  237. #header RUSSIAN_UCE_SUBJECT    Subject =~ /\xf0\xe5\xea\xeb\xe0\xec\xf3/
  238. #describe RUSSIAN_UCE_SUBJECT    Subject contains a Russian UCE tag
  239.  
  240. # Korean UCE Subject: lines are usually 8-bit, but are occasionally encoded
  241. # with quoted-printable or base64.
  242. #
  243. # \xbc\xba\xc0\xce means "adult"
  244. # \xb1\xa4\xb0\xed means "advertisement"
  245. # \xc1\xa4\xba\xb8 means "information"
  246. # \xc8\xab\xba\xb8 means "publicity"
  247. #
  248. # Each two byte sequence is one Korean letter; the spaces and periods are
  249. # sometimes used to obscure the words.  \xb1\xa4\xb0\xed is the most common
  250. # tag and is sometimes very obscured so we look harder.
  251. #
  252. header KOREAN_UCE_SUBJECT    Subject =~ /[({[<][. ]*(?:\xbc\xba[. ]*\xc0\xce[. ]*)?(?:\xb1\xa4(?:[. ]*|[\x00-\x7f]{0,3})\xb0\xed|\xc1\xa4[. ]*\xba\xb8|\xc8\xab[. ]*\xba\xb8)[. ]*[)}\]>]/
  253. describe KOREAN_UCE_SUBJECT    Subject: contains Korean unsolicited email tag
  254.  
  255. ###########################################################################
  256.  
  257. header FROM_AND_TO_SAME        eval:check_for_from_to_same()
  258. describe FROM_AND_TO_SAME    From and To are the same, but not exactly
  259.  
  260. header FORGED_RCVD_HELO        eval:check_for_forged_received_helo()
  261. describe FORGED_RCVD_HELO    Received: contains a forged HELO
  262.  
  263. header RCVD_HELO_IP_MISMATCH    eval:helo_ip_mismatch()
  264. describe RCVD_HELO_IP_MISMATCH    Received: HELO and IP do not match, but should
  265.  
  266. header RCVD_NUMERIC_HELO    eval:check_for_numeric_helo()
  267. describe RCVD_NUMERIC_HELO    Received: contains an IP address used for HELO
  268.  
  269. header RCVD_ILLEGAL_IP        eval:check_for_illegal_ip()
  270. describe RCVD_ILLEGAL_IP    Received: contains illegal IP address
  271.  
  272. # no legit mailer claims that their mailserver has no name
  273. # overlaps with RCVD_DOUBLE_IP*, but let's see how it is scored
  274. header RCVD_BY_IP    Received =~ /\bby\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(?<!127\.0\.0\.1)\b/
  275. describe RCVD_BY_IP    Received by mail server with no name
  276.  
  277. # two reliable signatures
  278. header __DOUBLE_IP_SPAM_1    Received =~ /from \[\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\] by \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} with/
  279. header __DOUBLE_IP_SPAM_2    Received =~ /from\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s+by\s+\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3};/
  280. # loose match
  281. header __DOUBLE_IP_LOOSE    Received =~ /(?:\b(?:from|by)\b.{1,4}\b\d{1,3}[._-]\d{1,3}[._-]\d{1,3}[._-]\d{1,3}(?<!127\.0\.0\.1)\b.{0,4}){2}/i
  282. # spam signature
  283. meta RCVD_DOUBLE_IP_SPAM    (__DOUBLE_IP_SPAM_1 || __DOUBLE_IP_SPAM_2)
  284. describe RCVD_DOUBLE_IP_SPAM    Bulk email fingerprint (double IP) found
  285. # other matches
  286. meta RCVD_DOUBLE_IP_LOOSE    (__DOUBLE_IP_LOOSE && !RCVD_DOUBLE_IP_SPAM)
  287. describe RCVD_DOUBLE_IP_LOOSE   Received: by and from look like IP addresses
  288.  
  289. header FORGED_AOL_RCVD            eval:check_for_fake_aol_relay_in_rcvd()
  290. describe FORGED_AOL_RCVD    Received forged, contains fake AOL relays
  291.  
  292. header FORGED_TELESP_RCVD    Received =~ /\.(?!br).. \(\d+-\d+-\d+-\d+\.dsl\.telesp\.net\.br /
  293. describe FORGED_TELESP_RCVD    Contains forged hostname for a DSL IP in Brazil
  294.  
  295. # a forged Hotmail message; host HELO'd as hotmail.com, but it wasn't
  296. header FORGED_HOTMAIL_RCVD    eval:check_for_forged_hotmail_received_headers()
  297. describe FORGED_HOTMAIL_RCVD    Forged hotmail.com 'Received:' header found
  298.  
  299. # this, by comparison is more common: from was @hotmail.com, but it wasn't
  300. header FORGED_HOTMAIL_RCVD2    eval:check_for_no_hotmail_received_headers()
  301. describe FORGED_HOTMAIL_RCVD2 hotmail.com 'From' address, but no 'Received:'
  302.  
  303. header FORGED_EUDORAMAIL_RCVD    eval:check_for_forged_eudoramail_received_headers()
  304. describe FORGED_EUDORAMAIL_RCVD    Forged eudoramail.com 'Received:' header found
  305.  
  306. header FORGED_YAHOO_RCVD    eval:check_for_forged_yahoo_received_headers()
  307. describe FORGED_YAHOO_RCVD    'From' yahoo.com does not match 'Received' headers
  308.  
  309. header FORGED_JUNO_RCVD        eval:check_for_forged_juno_received_headers()
  310. describe FORGED_JUNO_RCVD    'From' juno.com does not match 'Received' headers
  311.  
  312. header FORGED_GW05_RCVD        eval:check_for_forged_gw05_received_headers()
  313. describe FORGED_GW05_RCVD    Forged 'by gw05' 'Received:' header found
  314.  
  315. # not used directly right now due to FPs; but CONFIRMED_FORGED turns it
  316. # into a 1.0 S/O rule anyway, so that's not a problem ;)
  317. # 2.626   3.6340   1.5251    0.704   0.34    1.44  FORGED_RCVD_TRAIL
  318. # 0.956   3.3890   0.0000    1.000   0.98    4.30  CONFIRMED_FORGED
  319. header __FORGED_RCVD_TRAIL    eval:check_for_forged_received_trail()
  320.  
  321. # forgery meta-rules: more reliable than their inputs
  322. meta CONFIRMED_FORGED        (__FORGED_RCVD_TRAIL && (FORGED_AOL_RCVD || FORGED_HOTMAIL_RCVD || FORGED_EUDORAMAIL_RCVD || FORGED_YAHOO_RCVD || FORGED_JUNO_RCVD || FORGED_GW05_RCVD || FORGED_MX_HOTMAIL))
  323. describe CONFIRMED_FORGED    Received headers are forged
  324.  
  325. meta MULTI_FORGED        ((FORGED_AOL_RCVD + FORGED_HOTMAIL_RCVD + FORGED_EUDORAMAIL_RCVD + FORGED_YAHOO_RCVD + FORGED_JUNO_RCVD + FORGED_GW05_RCVD) > 1)
  326. describe MULTI_FORGED        Received headers indicate multiple forgeries
  327.  
  328. header NONEXISTENT_CHARSET    Content-Type =~ /charset=.?DEFAULT/
  329. describe NONEXISTENT_CHARSET    Character set doesn't exist
  330.  
  331. header CHARSET_FARAWAY_HEADER    eval:check_for_faraway_charset_in_headers()
  332. describe CHARSET_FARAWAY_HEADER    A foreign language charset used in headers
  333. tflags CHARSET_FARAWAY_HEADER    userconf
  334.  
  335. header X_PRIORITY_HIGH        X-Priority =~ /^1/
  336. describe X_PRIORITY_HIGH    Sent with 'X-Priority' set to high
  337.  
  338. header X_MSMAIL_PRIORITY_HIGH    X-Msmail-Priority =~ /^High/
  339. describe X_MSMAIL_PRIORITY_HIGH    Sent with 'X-Msmail-Priority' set to high
  340.  
  341. # this variant is local, using the Received hdr itself...
  342. header ROUND_THE_WORLD_LOCAL    eval:check_for_round_the_world_received_helo()
  343. describe ROUND_THE_WORLD_LOCAL    Received: says mail sent around the world (HELO)
  344.  
  345. # and this one uses a DNS reverse lookup.  so now we can use a version
  346. # of this test without a net connection, or in mass-check etc.
  347. header ROUND_THE_WORLD        eval:check_for_round_the_world_received_revdns()
  348. describe ROUND_THE_WORLD    Received: says mail sent around the world (DNS)
  349. tflags ROUND_THE_WORLD          net
  350.  
  351. header MISSING_DATE             Date =~ /^UNSET$/ [if-unset: UNSET]
  352. describe MISSING_DATE           Missing Date: header
  353.  
  354. # this is a quite common false positive, as it's legal to remove a To but leave
  355. # a CC. so don't score it high.
  356. header MISSING_HEADERS        eval:check_for_missing_to_header()
  357. describe MISSING_HEADERS    Missing To: header
  358.  
  359. header __HAS_SUBJECT        exists:Subject
  360. meta MISSING_SUBJECT        !__HAS_SUBJECT
  361. describe MISSING_SUBJECT    Missing Subject: header
  362.  
  363. header SUSPICIOUS_RECIPS    eval:similar_recipients('0.65','undef')
  364. describe SUSPICIOUS_RECIPS    Similar addresses in recipient list
  365.  
  366. header SORTED_RECIPS        eval:sorted_recipients()
  367. describe SORTED_RECIPS        Recipient list is sorted by address
  368.  
  369. header GAPPY_SUBJECT        Subject =~ /\b(?:[a-z]([-_. =~\/:,*!\@\#\$\%\^&+;\"\'<>\\])\1{0,2}){4,}/i
  370. describe GAPPY_SUBJECT        Subject: contains G.a.p.p.y-T.e.x.t
  371.  
  372. ### header existence tests (description is added automatically)
  373.  
  374. # X-Fix example: NTMail fixed non RFC822 compliant EMail message
  375. #
  376. # X-PMFLAGS is all caps
  377. #
  378. # Headers that seem to only be used by a single spamming software and
  379. # are found together in the same message:
  380. # 1. X-MailingID and X-ServerHost
  381. # 2. X-Stormpost-To and X-List-Unsubscribe
  382. #
  383. # not spammish: X-EM-Registration, X-EM-Version, X-Antiabuse, X-List-Host,
  384. # X-Message-Id
  385. # bad FP rate: Comment, Date-warning
  386.  
  387. header X_LIBRARY        exists:X-Library
  388. describe X_LIBRARY        Message has X-Library header
  389.  
  390. header   __HAS_MIMEOLE          exists:X-MimeOLE
  391. header   __HAS_MSMAIL_PRI       exists:X-MSMail-Priority
  392. header   __HAS_SQUIRRELMAIL_IN_MAILER    X-Mailer =~ /SquirrelMail\b/
  393. meta     MISSING_MIMEOLE    (__HAS_MSMAIL_PRI && !__HAS_MIMEOLE && !__HAS_SQUIRRELMAIL_IN_MAILER)
  394. describe MISSING_MIMEOLE    Message has X-MSMail-Priority, but no X-MimeOLE
  395.  
  396. header __HAS_X_MAILER        exists:X-Mailer
  397.  
  398. header __IS_EXCH        X-MimeOLE =~ /Produced By Microsoft Exchange V/
  399.  
  400. header __HAS_X_PRIORITY     exists:X-Priority
  401. header __USER_AGENT             exists:User-Agent
  402. meta PRIORITY_NO_NAME        ((__HAS_X_PRIORITY || __HAS_MSMAIL_PRI) && !__HAS_X_MAILER && !__IS_EXCH && !__USER_AGENT)
  403. describe PRIORITY_NO_NAME    Message has priority, but no X-Mailer/User-Agent
  404.  
  405. header SUBJ_AS_SEEN        Subject =~ /\bAs Seen/i
  406. describe SUBJ_AS_SEEN        Subject contains "As Seen"
  407.  
  408. header SUBJ_DOLLARS             Subject =~ /^\$[0-9.,]+\b/
  409. describe SUBJ_DOLLARS           Subject starts with dollar amount
  410.  
  411. header SUBJ_FOR_ONLY         Subject =~ /For Only/i
  412. describe SUBJ_FOR_ONLY         Subject contains "For Only"
  413.  
  414. header SUBJ_FREE_CAP        Subject =~ /FRE{2,}|F.R.E.E\b/
  415. describe SUBJ_FREE_CAP        Subject contains "FREE" in CAPS
  416.  
  417. header SUB_FREE_OFFER           Subject =~ /^fre{2,}\b/i
  418. describe SUB_FREE_OFFER         Subject starts with "Free"
  419.  
  420. header SUBJ_GUARANTEED          Subject =~ /^guaranteed|(?-i:GUARANTEE)/i
  421. describe SUBJ_GUARANTEED        Subject GUARANTEED
  422.  
  423. header SUB_HELLO                Subject =~ /^hello\b/i
  424. describe SUB_HELLO              Subject starts with "Hello"
  425.  
  426. header SUBJ_LIFE_INSURANCE    Subject =~ /life\s+insurance/i
  427. describe SUBJ_LIFE_INSURANCE    Subject includes "life insurance"
  428.  
  429. header SUBJ_YOUR_DEBT        Subject =~ /Your (?:Bills|Debt|Credit)/i
  430. describe SUBJ_YOUR_DEBT        Subject contains "Your Bills" or similar
  431.  
  432. header SUBJ_YOUR_FAMILY        Subject =~ /Your Family/i
  433. describe SUBJ_YOUR_FAMILY    Subject contains "Your Family"
  434.  
  435. header SUBJ_YOUR_OWN        Subject =~ /Your Own/i
  436. describe SUBJ_YOUR_OWN        Subject contains "Your Own"
  437.  
  438. # the real services never HELO as 'foo.com', instead 'mail.foo.com' or
  439. # something like that.  Note: be careful when expanding this... legit dotcom
  440. # HELOers include: hotmail.com, drizzle.com, lockergnome.com.
  441. header RCVD_FAKE_HELO_DOTCOM    Received =~ /^from (?:msn|yahoo|yourwebsite|lycos|excite|cs|aol|localhost|koreanmail|allexecs|mydomain|juno|eudoramail|compuserve|desertmail|excite|caramail)\.com \(/m
  442. describe RCVD_FAKE_HELO_DOTCOM  Received contains a faked HELO hostname
  443.  
  444. header ADDRESS_IN_SUBJECT    eval:check_for_to_in_subject('address')
  445. describe ADDRESS_IN_SUBJECT    To: address appears in Subject
  446.  
  447. header SUBJECT_DIET        Subject =~ /\bLose .*(?:pounds|lbs|weight)/i
  448. describe SUBJECT_DIET        Subject talks about losing pounds
  449.  
  450. header EXTRA_MPART_TYPE         Content-Type =~ /(?:\s*multipart\/)?.* type=/i
  451. describe EXTRA_MPART_TYPE       Header has extraneous Content-type:...type= entry
  452.  
  453. header TO_RECIP_MARKER          To =~ /\#recipient\#/
  454. describe TO_RECIP_MARKER        To header contains 'recipient' marker
  455.  
  456. # MIME boundary tests; spam tools use distinctive patterns.
  457. header MIME_BOUND_DD_DIGITS    Content-Type =~ /boundary=\"--\d+\"/
  458. describe MIME_BOUND_DD_DIGITS    Spam tool pattern in MIME boundary
  459. header MIME_BOUND_DIGITS_7    Content-Type =~ /boundary=\d{9}\.\d{13}/
  460. describe MIME_BOUND_DIGITS_7    Spam tool pattern in MIME boundary
  461. header MIME_BOUND_DIGITS_15    Content-Type =~ /boundary=\"\d{15,}\"/
  462. describe MIME_BOUND_DIGITS_15    Spam tool pattern in MIME boundary
  463. header MIME_BOUND_MANY_HEX    Content-Type =~ /boundary="[\da-f]{8}(?:-[\da-f]{4}){3}-[\da-f]{12}"/
  464. describe MIME_BOUND_MANY_HEX    Spam tool pattern in MIME boundary
  465. header __NEXTPART_ALL        Content-Type =~ /NextPart/
  466. header __NEXTPART_NORMAL    Content-Type =~ /="(?:----_?=_)?NextPart_[\dA-F]{3}(_[\dA-F]{3,8})?_[\dA-F]{8}\.[\dA-F]{8}"/
  467. meta MIME_BOUND_NEXTPART    (__NEXTPART_ALL && !__NEXTPART_NORMAL)
  468. describe MIME_BOUND_NEXTPART    Spam tool pattern in MIME boundary
  469. header MIME_BOUND_RKFINDY       Content-Type =~ /boundary=\"=_NextPart_2rfkindysadvnqw3nerasdf\"/
  470. describe MIME_BOUND_RKFINDY     Spam tool pattern in MIME boundary (rfkindy)
  471.  
  472. # note: the first alternation is anchored for speed
  473. header TO_MALFORMED             To !~ /(?:^|[^\S"])(?:(?:\"[^\"]+\"|\S+)\@\S+\.\S+|^\s*.+:\s*;|^\s*\"[^\"]+\":\s*;|^\s*\([^\)]*\)\s*$|<\S+(?:\!\S+){1,}>|^\s*$)/ [if-unset: unset@unset.unset]
  474. describe TO_MALFORMED           To: has a malformed address
  475.  
  476. # Most/all of these require that From addresses do not start with numbers.
  477. header FROM_NUM_AT_WEBMAIL    From:addr =~ /^\d\S+\@(?:msn\.com|flashmail\.com|mailexcite\.com|prodigy\.net|yahoo\.\S+|hotmail\.com|eudoramail\.com|aol\.com|excite\.com|email\.com|earthlink\.net|geocities\.com|hknetmail\.com|angelfire\.com)/i
  478. describe FROM_NUM_AT_WEBMAIL    From address is webmail, but starts with a number
  479.  
  480. header FROM_WEBMAIL_END_NUMS6    From:addr =~ /\d\d\d\d\d\d\@(?:aol|msn|bigfoot|compuserve|excite|hotmail|juno|prodigy|yahoo)\.(?:com|net|org)/i
  481. describe FROM_WEBMAIL_END_NUMS6    From webmail service and address ends in numbers
  482.  
  483. header ADDR_FREE              From =~ /\b(?-i:F)ree(?-i:[ A-Z]).*</i
  484. describe ADDR_FREE            From Address contains FREE
  485.  
  486. # common spam-dropping: To: C:\VICTIMS.txt@yourmx.org
  487. header TO_TXT            To =~ /\.txt[\'\"]?\@/i
  488. describe TO_TXT            Sent to a text file
  489.  
  490. header CHINA_HEADER             ALL =~ /\@china\.com/i
  491. describe CHINA_HEADER           Involves 'china.com'
  492.  
  493. header __CD                     exists:Content-Disposition
  494. header __CT                     exists:Content-Type
  495. header __CTE                    exists:Content-Transfer-Encoding
  496. header __MIME_VERSION           exists:MIME-Version
  497. header __CT_TEXT_PLAIN          Content-Type =~ /^text\/plain\b/i
  498. meta MIME_HEADER_CTYPE_ONLY     (!__CD && !__CTE && __CT && !__MIME_VERSION && !__CT_TEXT_PLAIN)
  499. describe MIME_HEADER_CTYPE_ONLY 'Content-Type' found without required MIME headers
  500.  
  501. header WITH_LC_SMTP        Received =~ /\swith\ssmtp;\s/
  502. describe WITH_LC_SMTP        Received line contains spam-sign (lowercase smtp)
  503.  
  504. header FROM_NO_LOWER        From:addr !~ /[a-z]/ [if-unset: x@example.com]
  505. describe FROM_NO_LOWER        From address has no lower-case characters
  506.  
  507. header SUBJ_BUY                 Subject =~ /^buy/i
  508. describe SUBJ_BUY               Subject line starts with Buy or Buying
  509.  
  510. header __FROM_HAS_UNDERLINE_NUMS    From =~ /_\S?(?:[a-z]+\w*?\d+|\d+\w*?[a-z]+)\w*\@/i
  511. meta FROM_HAS_ULINE_NUMS        (!REPLY_TO_ULINE_NUMS && __FROM_HAS_UNDERLINE_NUMS)
  512. describe FROM_HAS_ULINE_NUMS    From: contains an underline and numbers/letters
  513.  
  514. header NIGERIAN_SUBJECT1    Subject =~ /^(?:Re:|\[.{1,10}\])?\s*(?:(?:very )?URGENT|ATTENTION)\s*$/i
  515. describe NIGERIAN_SUBJECT1    Subject is indicative of a Nigerian spam
  516. header NIGERIAN_SUBJECT2    Subject =~ /^(?:Re:|\[.{1,10}\])?\s*(?:very )?urgent\s+(?:(?:and|&)\s+)?(?:confidential|assistance|business|attention|reply|response|help)\b/i
  517. describe NIGERIAN_SUBJECT2    Subject is indicative of a Nigerian spam
  518.  
  519. # this code uses an access database (sendmail, postfix, etc.)
  520. # Since you need to actively create an accessdb to use it, the rule is
  521. # considered userconf and is disabled by default.
  522. header ACCESSDB            eval:check_access_database('/etc/mail/access.db')
  523. describe ACCESSDB        Message would have been caught by accessdb
  524. tflags ACCESSDB            userconf
  525.  
  526. # seems to be ratware
  527. header RCVD_AM_PM        Received =~ /; [A-Z][a-z][a-z], \d{1,2} \d{4} \d{1,2}:\d\d:\d\d [AP]M [+-]\d{4}/
  528. describe RCVD_AM_PM        Received headers forged (AM/PM)
  529.  
  530. header HEADER_COUNT_CTYPE    eval:check_header_count_range('Content-Type','2','999')
  531. describe HEADER_COUNT_CTYPE    Multiple Content-Type headers found
  532.  
  533. header __USER_AGENT_MSN             X-Mailer =~ /^MSN Explorer /
  534.  
  535. header NO_RDNS_DOTCOM_HELO    eval:check_for_no_rdns_dotcom_helo()
  536. describe NO_RDNS_DOTCOM_HELO    Host HELO'd as a big ISP, but had no rDNS
  537.  
  538. header X_ORIG_IP_NOT_IPV4    X-Originating-IP !~ /\[?(?:\d{1,3}\.){3}\d{1,3}\]?/ [if-unset: 0.0.0.0]
  539. describe X_ORIG_IP_NOT_IPV4    X-Originating-IP doesn't look like IPv4 address
  540.  
  541. # match the format of a legit X-Auth-Warning header, and hit on fake ones
  542. # normal: "e4e.oac.uci.edu: foo owned process doing -bs"
  543. # fake: "bzgrdag, upaeqehv"
  544. header X_AUTH_WARN_FAKED    X-Authentication-Warning !~ /^(\S+\.\S+): / [if-unset: host.example.net: foo owned process doing -bs]
  545. describe X_AUTH_WARN_FAKED    X-Authentication-Warning header looks faked
  546.  
  547. # host no longer exists according to administrator
  548. header FAKE_OUTBLAZE_RCVD    Received =~ /\.mr\.outblaze\.com/
  549. describe FAKE_OUTBLAZE_RCVD    Received header contains faked 'mr.outblaze.com'
  550.  
  551. # domains never longer used for email, confirmed by administrator
  552. header FROM_NONSENDING_DOMAIN    From:addr =~ /\@(?:altavista\.com|eudora\.com)$/i
  553. describe FROM_NONSENDING_DOMAIN    Message is from domain that never sends email
  554.  
  555. header SUBJ_2_NUM_PARENS        Subject =~ /^\(\d+\).*\(\d+\)\s*$/
  556. describe SUBJ_2_NUM_PARENS      Subject contains common spam sign (2 numbers)
  557.